#---------------------------------------------------------------------------------#
### information: 
#---------------------------------------------------------------------------------#
### authors: björn bremer (bremer@mpifg.de), reto bürgisser (buergisser@ipz.uzh.ch)
### last updated: 22 October 2021

#---------------------------------------------------------------------------------#
### description
#---------------------------------------------------------------------------------#

### this file replicates the analysis of the following paper:
### "Public Opinion on Welfare State Recalibration in Times of Austerity"
### this file focuses on the results from the CONJOINT EXPERIMENT shown in the APPENDIX
### NOTE: The file uses code for parallelization that runs on MacOS and Unix (parallel::mclapply()). 
### NOTE: The code needs to be adjusted to run on Windows (parallel::parLapply()).

#---------------------------------------------------------------------------------#

RNGkind("L'Ecuyer-CMRG")
set.seed(1234567)

#--------------------------------------------------------
### load libraries ###
#--------------------------------------------------------

#library(rstudioapi)
library(plyr)
library(dplyr)
library(ggplot2) 
library(ggpubr)    # necessary to combine plots
library(glmnet)    # necessary for ridge regression
library(parallel)  # necessary for parallel computing
library(doMC)      # necessary for parallel computing
library(broom)     # necessary to get tidy regression output
library(xtable)    # necessary to create and print tex table
library(reshape2)  # necessary to reshape the data inside the function

#--------------------------------------------------------
### set the number of cores ###
#--------------------------------------------------------

registerDoMC(cores = 4)

#--------------------------------------------------------
### load data ###
#--------------------------------------------------------

load("df_cj_psrm_replication.Rdata") # load the file

#--------------------------------------------------------
### bootstrap functions to calculate coefficients and marginal means with ridge regression ###
#--------------------------------------------------------

B <- 1000  # n of bootstrap draws
#B <- 2 ## to test

alpha = .05 # level of statistical significance 

## function to calculate acmes
boot.ridge <- function(x, data){
   a <- unique(data$Response.ID)
   rid.b <- sample(a, length(a), repl = TRUE)
   ind.b <- c()
   for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
   DD.b <- data[ind.b,]
   x.b <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=DD.b, 
                       contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                          Education=contrasts(DD.b$Education, contrasts=TRUE),
                                          PLMP=contrasts(DD.b$PLMP, contrasts=TRUE),
                                          ALMP=contrasts(DD.b$ALMP, contrasts=TRUE),
                                          Child.benefits=contrasts(DD.b$Child.benefits, contrasts=TRUE),
                                          Childcare=contrasts(DD.b$Childcare)))
   y.b <- DD.b$selected
   fit.ridge.b <- glmnet(x.b, y.b, alpha = 0,
                         lambda = bestlambda.ridge)
   coefficient.b <- predict(fit.ridge.b, type = "coefficients", s = bestlambda.ridge)
   tidy.b <- tidy(fit.ridge.b)
}

boot.ridge.weights <- function(x, data){
   a <- unique(data$Response.ID)
   rid.b <- sample(a, length(a), repl = TRUE)
   ind.b <- c()
   for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
   DD.b <- data[ind.b,]
   x.b <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=DD.b, 
                       contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                          Education=contrasts(DD.b$Education, contrasts=TRUE),
                                          PLMP=contrasts(DD.b$PLMP, contrasts=TRUE),
                                          ALMP=contrasts(DD.b$ALMP, contrasts=TRUE),
                                          Child.benefits=contrasts(DD.b$Child.benefits, contrasts=TRUE),
                                          Childcare=contrasts(DD.b$Childcare)))
   y.b <- DD.b$selected
   fit.ridge.b <- glmnet(x.b, y.b, alpha = 0, weights = DD.b$wgt,
                         lambda = bestlambda.ridge)
   coefficient.b <- predict(fit.ridge.b, type = "coefficients", s = bestlambda.ridge)
   tidy.b <- tidy(fit.ridge.b)
}

## function to calculate marginal means
boot.ridge.mm <- function(x, data){
   a <- unique(data$Response.ID)
   rid.b <- sample(a, length(a), repl = TRUE)
   ind.b <- c()
   for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
   DD.b <- data[ind.b,]
   x.b <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=DD.b, 
                       contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                          Education=contrasts(DD.b$Education, contrasts=TRUE),
                                          PLMP=contrasts(DD.b$PLMP, contrasts=TRUE),
                                          ALMP=contrasts(DD.b$ALMP, contrasts=TRUE),
                                          Child.benefits=contrasts(DD.b$Child.benefits, contrasts=TRUE),
                                          Childcare=contrasts(DD.b$Childcare)))
   y.b <- DD.b$selected
   fit.ridge.b <- glmnet(x.b, y.b, alpha = 0,
                         lambda = bestlambda.ridge)
   means.b <- predict(fit.ridge.b, x.b, type = "response", s = bestlambda.ridge)
   mm.b <- data.frame(x.b, fitted = means.b[, 1L, drop = TRUE])
   mm.b$PensionsNochange <- ifelse(mm.b$PensionsIncrease == 0 & mm.b$PensionsDecrease == 0, 1, 0)
   mm.b$EducationNochange <- ifelse(mm.b$EducationIncrease == 0 & mm.b$EducationDecrease == 0, 1, 0)
   mm.b$PLMPNochange <- ifelse(mm.b$PLMPIncrease == 0 & mm.b$PLMPDecrease == 0, 1,0)
   mm.b$ALMPNochange <- ifelse(mm.b$ALMPIncrease == 0 & mm.b$ALMPDecrease == 0, 1,0)
   mm.b$Child.benefitsNochange <- ifelse(mm.b$Child.benefitsIncrease == 0 & mm.b$Child.benefitsDecrease == 0, 1,0)
   mm.b$ChildcareNochange <- ifelse(mm.b$ChildcareIncrease == 0 & mm.b$ChildcareDecrease == 0, 1,0)
   mm_long.b <- melt(mm.b, id.vars = c("fitted"))
   mm_sum.b <- ddply(mm_long.b, c("variable", "value"), summarise,
                     mean=mean(fitted))
   mm_sum.b <- mm_sum.b[which (mm_sum.b$value == 1),]
}

#--------------------------------------------------------
### figure a5 and a6: amces and mm by country ###
#--------------------------------------------------------

###-----------------------------------
## germany

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$country == "DE"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$country == "DE"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients
start_time <- Sys.time()
out_coef_country_de <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$country == "DE"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_de <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "DE"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_country_de <- do.call(rbind.data.frame, out_coef_country_de)
df_out_mm_country_de <- do.call(rbind.data.frame, out_mm_country_de)

# calculate mean and higher/lower CI of the coefficients
coef_sum_country_de <- ddply(df_out_coef_country_de, c("term"), summarise,
                             mean = mean(estimate),
                             low=quantile(estimate, alpha / 2),
                             high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_country_de)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_de$term <- as.character(df_out_mm_country_de$variable)
df_out_mm_country_de$estimate <- as.numeric(df_out_mm_country_de$mean)

mm_sum_country_de <- ddply(df_out_mm_country_de, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_de)

# include the baseline as a level (only for the coefficients)
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_country_de <- rbind(coef_sum_country_de, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_country_de <- rbind(coef_sum_country_de, ridge_attributes)
mm_sum_country_de <- rbind(mm_sum_country_de, ridge_attributes)

###-----------------------------------
## italy 

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$country == "IT"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$country == "IT"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients
start_time <- Sys.time()
out_coef_country_it <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$country == "IT"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_it <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "IT"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_country_it <- do.call(rbind.data.frame, out_coef_country_it)
df_out_mm_country_it <- do.call(rbind.data.frame, out_mm_country_it)

# calculate mean and higher/lower CI of the coefficients

coef_sum_country_it <- ddply(df_out_coef_country_it, c("term"), summarise,
                             mean = mean(estimate),
                             low=quantile(estimate, alpha / 2),
                             high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_country_it)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_it$term <- as.character(df_out_mm_country_it$variable)
df_out_mm_country_it$estimate <- as.numeric(df_out_mm_country_it$mean)

mm_sum_country_it <- ddply(df_out_mm_country_it, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_it)

# include the baseline as a level (only for the coefficients)
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_country_it <- rbind(coef_sum_country_it, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_country_it <- rbind(coef_sum_country_it, ridge_attributes)
mm_sum_country_it <- rbind(mm_sum_country_it, ridge_attributes)

###-----------------------------------
## uk

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$country == "UK"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$country == "UK"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients
start_time <- Sys.time()
out_coef_country_uk <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$country == "UK"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_country_uk <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$country == "UK"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_country_uk <- do.call(rbind.data.frame, out_coef_country_uk)
df_out_mm_country_uk <- do.call(rbind.data.frame, out_mm_country_uk)

# calculate mean and higher/lower CI of the coefficients
coef_sum_country_uk <- ddply(df_out_coef_country_uk, c("term"), summarise,
                             mean = mean(estimate),
                             low=quantile(estimate, alpha / 2),
                             high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_country_uk)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_country_uk$term <- as.character(df_out_mm_country_uk$variable)
df_out_mm_country_uk$estimate <- as.numeric(df_out_mm_country_uk$mean)

mm_sum_country_uk <- ddply(df_out_mm_country_uk, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_country_uk)

# include the baseline as a level (only for the coefficients)
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_country_uk <- rbind(coef_sum_country_uk, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_country_uk <- rbind(coef_sum_country_uk, ridge_attributes)
mm_sum_country_uk <- rbind(mm_sum_country_uk, ridge_attributes)

###-------------------------------------------
## combine the coefficients and mm from all groups
coef_sum_country_de$country <- "DE"
coef_sum_country_it$country <- "IT"
coef_sum_country_uk$country <- "UK"

mm_sum_country_de$country <- "DE"
mm_sum_country_it$country<- "IT"
mm_sum_country_uk$country <- "UK"

coef_sum_country <- rbind(coef_sum_country_de, coef_sum_country_it, coef_sum_country_uk)
mm_sum_country <- rbind(mm_sum_country_de, mm_sum_country_it, mm_sum_country_uk)

## plot the coefficients
# rename the levels
coef_sum_country$names_final <- ifelse(coef_sum_country$term == "EducationDecrease", "Decrease education spending",
                                       ifelse(coef_sum_country$term == "EducationIncrease", "Increase education spending",
                                              ifelse(coef_sum_country$term == "PensionsDecrease", "Decrease pension spending",
                                                     ifelse(coef_sum_country$term == "PensionsIncrease", "Increase pension spending",
                                                            ifelse(coef_sum_country$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                   ifelse(coef_sum_country$term == "ALMPIncrease", "Increase ALMP spending",
                                                                          ifelse(coef_sum_country$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                 ifelse(coef_sum_country$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                        ifelse(coef_sum_country$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                               ifelse(coef_sum_country$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                      ifelse(coef_sum_country$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                             ifelse(coef_sum_country$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))

# exclude the intercept from the df
coef_sum_country <- subset(coef_sum_country, term!= "(Intercept)")

# group the levels by attribute
coef_sum_country$group <- ifelse(coef_sum_country$term == "(Education)" | coef_sum_country$term == "EducationNochange" | coef_sum_country$term == "EducationIncrease" | coef_sum_country$term == "EducationDecrease", 1,
                                 ifelse(coef_sum_country$term == "(Pensions)" | coef_sum_country$term == "PensionsNochange" | coef_sum_country$term == "PensionsIncrease" | coef_sum_country$term == "PensionsDecrease", 2,
                                        ifelse(coef_sum_country$term == "(ALMP)" | coef_sum_country$term ==  "ALMPNochange" | coef_sum_country$term == "ALMPIncrease" | coef_sum_country$term == "ALMPDecrease", 3,
                                               ifelse(coef_sum_country$term == "(PLMP)" | coef_sum_country$term == "PLMPNochange" | coef_sum_country$term == "PLMPIncrease" | coef_sum_country$term == "PLMPDecrease", 4,
                                                      ifelse(coef_sum_country$term == "(Childcare)" | coef_sum_country$term == "ChildcareNochange" | coef_sum_country$term == "ChildcareIncrease" | coef_sum_country$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_country$names_ordered <- factor(coef_sum_country$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                           "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                           "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                           "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                           "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                           "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_country <- ggplot(coef_sum_country, aes(x = names_ordered, colour = country)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = country), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Country") +
   labs(shape = "Country") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Change in Pr(Support for reform package)") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
coefplot_country

ggsave(coefplot_country, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a5.eps")

## plot the mm
# rename the levels
mm_sum_country$names_final <- ifelse(mm_sum_country$term == "EducationDecrease", "Decrease education spending",
                                      ifelse(mm_sum_country$term == "EducationIncrease", "Increase education spending",
                                             ifelse(mm_sum_country$term == "EducationNochange", "No change",
                                                    ifelse(mm_sum_country$term == "PensionsDecrease", "Decrease pension spending",
                                                           ifelse(mm_sum_country$term == "PensionsIncrease", "Increase pension spending",
                                                                  ifelse(mm_sum_country$term == "PensionsNochange", "No change",
                                                                         ifelse(mm_sum_country$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                                ifelse(mm_sum_country$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                       ifelse(mm_sum_country$term == "ALMPNochange", "No change",
                                                                                              ifelse(mm_sum_country$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                     ifelse(mm_sum_country$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                            ifelse(mm_sum_country$term == "PLMPNochange", "No change",
                                                                                                                   ifelse(mm_sum_country$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                          ifelse(mm_sum_country$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                                 ifelse(mm_sum_country$term == "ChildcareNochange", "No change",
                                                                                                                                        ifelse(mm_sum_country$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                               ifelse(mm_sum_country$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                      ifelse(mm_sum_country$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))
# exclude the intercept from the df
mm_sum_country <- subset(mm_sum_country, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_country$group <- ifelse(mm_sum_country$term == "(Education)" | mm_sum_country$term == "EducationNochange" | mm_sum_country$term == "EducationIncrease" | mm_sum_country$term == "EducationDecrease", 1,
                               ifelse(mm_sum_country$term == "(Pensions)" | mm_sum_country$term == "PensionsNochange" | mm_sum_country$term == "PensionsIncrease" | mm_sum_country$term == "PensionsDecrease", 2,
                                      ifelse(mm_sum_country$term == "(ALMP)" | mm_sum_country$term ==  "ALMPNochange" | mm_sum_country$term == "ALMPIncrease" | mm_sum_country$term == "ALMPDecrease", 3,
                                             ifelse(mm_sum_country$term == "(PLMP)" | mm_sum_country$term == "PLMPNochange" | mm_sum_country$term == "PLMPIncrease" | mm_sum_country$term == "PLMPDecrease", 4,
                                                    ifelse(mm_sum_country$term == "(Childcare)" | mm_sum_country$term == "ChildcareNochange" | mm_sum_country$term == "ChildcareIncrease" | mm_sum_country$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_country$names_ordered <- factor(mm_sum_country$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                       "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                       "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                       "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                       "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                       "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the mm
mmplot_country <- ggplot(mm_sum_country,aes(x = names_ordered, colour = country)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = country), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() +
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Country") +
   labs(shape = "Country") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_country

ggsave(mmplot_country, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a6.eps")

#--------------------------------------------------------
### figure a8: amces with survey weights ###
#--------------------------------------------------------

# delete observations without weights 
df_cj_weights <- df_cj[which (!is.na(df_cj$wgt)),]

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj_weights,
                  contrasts.arg=list(Pensions=contrasts(df_cj_weights$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj_weights$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj_weights$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj_weights$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj_weights$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj_weights$Childcare)))

# create a dv
y <- df_cj_weights$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid,
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate results
start_time <- Sys.time()
out_coef_weights <- mclapply(1:B, boot.ridge.weights, data = df_cj_weights, mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_weights <- do.call(rbind.data.frame, out_coef_weights)

# calculate mean and higher/lower CI of the coefficients
coef_sum_weights <- ddply(df_out_coef_weights, c("term"), summarise,
                  mean = mean(estimate),
                  low=quantile(estimate, alpha / 2),
                  high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_weights)

# include the baseline as a level
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange",
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0))

coef_sum_weights <- rbind(coef_sum_weights, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))
coef_sum_weights <- rbind(coef_sum_weights, ridge_attributes)

## plot the coefficients
# rename the levels
coef_sum_weights$names_final <- ifelse(coef_sum_weights$term == "EducationDecrease", "Decrease education spending",
                               ifelse(coef_sum_weights$term == "EducationIncrease", "Increase education spending",
                                      ifelse(coef_sum_weights$term == "PensionsDecrease", "Decrease pension spending",
                                             ifelse(coef_sum_weights$term == "PensionsIncrease", "Increase pension spending",
                                                    ifelse(coef_sum_weights$term == "ALMPDecrease", "Decrease ALMP spending",
                                                           ifelse(coef_sum_weights$term == "ALMPIncrease", "Increase ALMP spending",
                                                                  ifelse(coef_sum_weights$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                         ifelse(coef_sum_weights$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                ifelse(coef_sum_weights$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                       ifelse(coef_sum_weights$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                              ifelse(coef_sum_weights$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                     ifelse(coef_sum_weights$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))

# exclude the intercept from the df
coef_sum_weights <- subset(coef_sum_weights, term!= "(Intercept)")

# group the levels by attribute
coef_sum_weights$group <- ifelse(coef_sum_weights$term == "(Education)" | coef_sum_weights$term == "EducationNochange" | coef_sum_weights$term == "EducationIncrease" | coef_sum_weights$term == "EducationDecrease", 1,
                         ifelse(coef_sum_weights$term == "(Pensions)" | coef_sum_weights$term == "PensionsNochange" | coef_sum_weights$term == "PensionsIncrease" | coef_sum_weights$term == "PensionsDecrease", 2,
                                ifelse(coef_sum_weights$term == "(ALMP)" | coef_sum_weights$term ==  "ALMPNochange" | coef_sum_weights$term == "ALMPIncrease" | coef_sum_weights$term == "ALMPDecrease", 3,
                                       ifelse(coef_sum_weights$term == "(PLMP)" | coef_sum_weights$term == "PLMPNochange" | coef_sum_weights$term == "PLMPIncrease" | coef_sum_weights$term == "PLMPDecrease", 4,
                                              ifelse(coef_sum_weights$term == "(Childcare)" | coef_sum_weights$term == "ChildcareNochange" | coef_sum_weights$term == "ChildcareIncrease" | coef_sum_weights$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_weights$names_ordered <- factor(coef_sum_weights$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                           "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)",
                                                           "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                           "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)",
                                                           "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                           "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_weights <- ggplot(coef_sum_weights,aes(x = names_ordered)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1) +
   geom_point(aes(y=mean), size=1.5) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() +
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   theme(legend.position="none") +
   xlab("") + ylab("Change in Pr(Support for reform package)")
coefplot_weights

ggsave(coefplot_weights, width = 15, height = 12, units = c("cm"), file ="figure_a8.eps")

#--------------------------------------------------------
### figure a9: amces with rating variable ###
#--------------------------------------------------------

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj, 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj$rating

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate results
start_time <- Sys.time()
out_coef_rating <- mclapply(1:B, boot.ridge, data = df_cj, mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_rating <- do.call(rbind.data.frame, out_coef_rating)

# calculate mean and higher/lower CI of the coefficients
coef_sum_rating <- ddply(df_out_coef_rating, c("term"), summarise,
                  mean = mean(estimate),
                  low=quantile(estimate, alpha / 2),
                  high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_rating)

# include the baseline as a level (only for the coefficients)
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0))

coef_sum_rating <- rbind(coef_sum_rating, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))
coef_sum_rating <- rbind(coef_sum_rating, ridge_attributes)

# rename the levels
coef_sum_rating$names_final <- ifelse(coef_sum_rating$term == "EducationDecrease", "Decrease education spending",
                               ifelse(coef_sum_rating$term == "EducationIncrease", "Increase education spending",
                                      ifelse(coef_sum_rating$term == "PensionsDecrease", "Decrease pension spending",
                                             ifelse(coef_sum_rating$term == "PensionsIncrease", "Increase pension spending",
                                                    ifelse(coef_sum_rating$term == "ALMPDecrease", "Decrease ALMP spending",
                                                           ifelse(coef_sum_rating$term == "ALMPIncrease", "Increase ALMP spending",
                                                                  ifelse(coef_sum_rating$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                         ifelse(coef_sum_rating$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                ifelse(coef_sum_rating$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                       ifelse(coef_sum_rating$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                              ifelse(coef_sum_rating$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                     ifelse(coef_sum_rating$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))

# exclude the intercept from the df
coef_sum_rating <- subset(coef_sum_rating, term!= "(Intercept)")

# group the levels by attribute
coef_sum_rating$group <- ifelse(coef_sum_rating$term == "(Education)" | coef_sum_rating$term == "EducationNochange" | coef_sum_rating$term == "EducationIncrease" | coef_sum_rating$term == "EducationDecrease", 1,
                         ifelse(coef_sum_rating$term == "(Pensions)" | coef_sum_rating$term == "PensionsNochange" | coef_sum_rating$term == "PensionsIncrease" | coef_sum_rating$term == "PensionsDecrease", 2,
                                ifelse(coef_sum_rating$term == "(ALMP)" | coef_sum_rating$term ==  "ALMPNochange" | coef_sum_rating$term == "ALMPIncrease" | coef_sum_rating$term == "ALMPDecrease", 3,
                                       ifelse(coef_sum_rating$term == "(PLMP)" | coef_sum_rating$term == "PLMPNochange" | coef_sum_rating$term == "PLMPIncrease" | coef_sum_rating$term == "PLMPDecrease", 4,
                                              ifelse(coef_sum_rating$term == "(Childcare)" | coef_sum_rating$term == "ChildcareNochange" | coef_sum_rating$term == "ChildcareIncrease" | coef_sum_rating$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_rating$names_ordered <- factor(coef_sum_rating$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                           "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                           "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                           "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                           "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                           "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

coefplot_rating <- ggplot(coef_sum_rating,aes(x = names_ordered)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1) +
   geom_point(aes(y=mean), size=1.5) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   theme(legend.position="none") +
   xlab("") + ylab("Change in Pr(support for reform package)")
coefplot_rating

ggsave(coefplot_rating, width = 15, height = 12, units = c("cm"), file ="figure_a9.eps")

#--------------------------------------------------------
### figure a10: estimated marginal means for retired respondents vs. all others respondents ###
# NOTE: this part uses output from 2_figures_conjoint_main; it can only be run after the other script was fully executed #
#--------------------------------------------------------

load("../../data/results/cj_results_main.RData")

mmplot_retired <- ggplot(mm_sum_retired, aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
mmplot_retired

ggsave(mmplot_retired, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a10.eps")

#--------------------------------------------------------
### figure a11: estimated marginal means for education constituency vs. all others respondents ###
# NOTE: this part uses output from 2_figures_conjoint_main; it can only be run after the other script was fully executed #
#--------------------------------------------------------

# plot the mm
mmplot_eduben <- ggplot(mm_sum_eduben,aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_eduben

ggsave(mmplot_eduben, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a11.eps")

#--------------------------------------------------------
### figure a12: estimated marginal means for outsiders vs. all others respondents ###
# NOTE: this part uses output from 2_figures_conjoint_main; it can only be run after the other script was fully executed #
#--------------------------------------------------------

# plot the mm
mmplot_outsider <- ggplot(mm_sum_outsider, aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
mmplot_outsider

ggsave(mmplot_outsider, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a12.eps")

#--------------------------------------------------------
### figure a13: estimated marginal means for parents of young children vs. all others respondents ###
# NOTE: this part uses output from 2_figures_conjoint_main; it can only be run after the other script was fully executed #
#--------------------------------------------------------

# plot the mm
mmplot_children_u10 <- ggplot(mm_sum_children_u10, aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Family composition") +
   labs(shape = "Family composition") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
mmplot_children_u10

ggsave(mmplot_children_u10, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a13.eps")

#--------------------------------------------------------
### figure a14: estimated marginal means by education level ###
#--------------------------------------------------------

###-----------------------------------
## low education 

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$education_c == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$education_c == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_edu_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$education_c == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_edu_1 <- do.call(rbind.data.frame, out_mm_edu_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_edu_1$term <- as.character(df_out_mm_edu_1$variable)
df_out_mm_edu_1$estimate <- as.numeric(df_out_mm_edu_1$mean)

mm_sum_edu_1 <- ddply(df_out_mm_edu_1, c("term"), summarise,
                      mean = mean(estimate),
                      low=quantile(estimate, alpha / 2),
                      high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_edu_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_edu_1 <- rbind(mm_sum_edu_1, ridge_attributes)

###-----------------------------------
## medium education

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$education_c == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$education_c == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_edu_2 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$education_c == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_edu_2 <- do.call(rbind.data.frame, out_mm_edu_2)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_edu_2$term <- as.character(df_out_mm_edu_2$variable)
df_out_mm_edu_2$estimate <- as.numeric(df_out_mm_edu_2$mean)

mm_sum_edu_2 <- ddply(df_out_mm_edu_2, c("term"), summarise,
                      mean = mean(estimate),
                      low=quantile(estimate, alpha / 2),
                      high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_edu_2)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_edu_2 <- rbind(mm_sum_edu_2, ridge_attributes)

###-----------------------------------
## high education

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$education_c == 3),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$education_c == 3),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_edu_3 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$education_c == 3),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_edu_3 <- do.call(rbind.data.frame, out_mm_edu_3)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_edu_3$term <- as.character(df_out_mm_edu_3$variable)
df_out_mm_edu_3$estimate <- as.numeric(df_out_mm_edu_3$mean)

mm_sum_edu_3 <- ddply(df_out_mm_edu_3, c("term"), summarise,
                      mean = mean(estimate),
                      low=quantile(estimate, alpha / 2),
                      high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_edu_3)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_edu_3 <- rbind(mm_sum_edu_3, ridge_attributes)

###-------------------------------------------
## combine the coefficients and mms from all groups

mm_sum_edu_1$edu <- "Low"
mm_sum_edu_2$edu <- "Medium"
mm_sum_edu_3$edu <- "High"

mm_sum_edu <- rbind(mm_sum_edu_1, mm_sum_edu_2, mm_sum_edu_3)

# order edu groups
mm_sum_edu$edu <- factor(mm_sum_edu$edu, levels = c("Low", "Medium", "High"))

## plot the mms
# rename the levels
mm_sum_edu$names_final <- ifelse(mm_sum_edu$term == "EducationDecrease", "Decrease education spending",
                                 ifelse(mm_sum_edu$term == "EducationIncrease", "Increase education spending",
                                        ifelse(mm_sum_edu$term == "EducationNochange", "No change",
                                               ifelse(mm_sum_edu$term == "PensionsDecrease", "Decrease pension spending",
                                                      ifelse(mm_sum_edu$term == "PensionsIncrease", "Increase pension spending",
                                                             ifelse(mm_sum_edu$term == "PensionsNochange", "No change",
                                                                    ifelse(mm_sum_edu$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                           ifelse(mm_sum_edu$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                  ifelse(mm_sum_edu$term == "ALMPNochange", "No change",
                                                                                         ifelse(mm_sum_edu$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                ifelse(mm_sum_edu$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                       ifelse(mm_sum_edu$term == "PLMPNochange", "No change",
                                                                                                              ifelse(mm_sum_edu$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                     ifelse(mm_sum_edu$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                            ifelse(mm_sum_edu$term == "ChildcareNochange", "No change",
                                                                                                                                   ifelse(mm_sum_edu$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                          ifelse(mm_sum_edu$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                 ifelse(mm_sum_edu$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))
# exclude the intercept from the df
mm_sum_edu <- subset(mm_sum_edu, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_edu$group <- ifelse(mm_sum_edu$term == "(Education)" | mm_sum_edu$term == "EducationNochange" | mm_sum_edu$term == "EducationIncrease" | mm_sum_edu$term == "EducationDecrease", 1,
                           ifelse(mm_sum_edu$term == "(Pensions)" | mm_sum_edu$term == "PensionsNochange" | mm_sum_edu$term == "PensionsIncrease" | mm_sum_edu$term == "PensionsDecrease", 2,
                                  ifelse(mm_sum_edu$term == "(ALMP)" | mm_sum_edu$term ==  "ALMPNochange" | mm_sum_edu$term == "ALMPIncrease" | mm_sum_edu$term == "ALMPDecrease", 3,
                                         ifelse(mm_sum_edu$term == "(PLMP)" | mm_sum_edu$term == "PLMPNochange" | mm_sum_edu$term == "PLMPIncrease" | mm_sum_edu$term == "PLMPDecrease", 4,
                                                ifelse(mm_sum_edu$term == "(Childcare)" | mm_sum_edu$term == "ChildcareNochange" | mm_sum_edu$term == "ChildcareIncrease" | mm_sum_edu$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_edu$names_ordered <- factor(mm_sum_edu$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                               "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                               "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                               "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                               "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                               "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the mm
mmplot_edu <- ggplot(mm_sum_edu,aes(x = names_ordered, colour = edu)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = edu), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() +
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Education level") +
   labs(shape = "Education level") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_edu

ggsave(mmplot_edu, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a14.eps")

#--------------------------------------------------------
### figure a15: estimated marginal means by income ###
#--------------------------------------------------------

###-----------------------------------
## low income

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$income_brackets_three == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$income_brackets_three == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_income_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$income_brackets_three == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_income_1 <- do.call(rbind.data.frame, out_mm_income_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_income_1$term <- as.character(df_out_mm_income_1$variable)
df_out_mm_income_1$estimate <- as.numeric(df_out_mm_income_1$mean)

mm_sum_income_1 <- ddply(df_out_mm_income_1, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_income_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_income_1 <- rbind(mm_sum_income_1, ridge_attributes)

###-----------------------------------
## medium income

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$income_brackets_three == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$income_brackets_three == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_income_2 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$income_brackets_three == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_income_2 <- do.call(rbind.data.frame, out_mm_income_2)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_income_2$term <- as.character(df_out_mm_income_2$variable)
df_out_mm_income_2$estimate <- as.numeric(df_out_mm_income_2$mean)

mm_sum_income_2 <- ddply(df_out_mm_income_2, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_income_2)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_income_2 <- rbind(mm_sum_income_2, ridge_attributes)

###-----------------------------------
## high income

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$income_brackets_three == 3),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$income_brackets_three == 3),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_income_3 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$income_brackets_three == 3),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_income_3 <- do.call(rbind.data.frame, out_mm_income_3)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_income_3$term <- as.character(df_out_mm_income_3$variable)
df_out_mm_income_3$estimate <- as.numeric(df_out_mm_income_3$mean)

mm_sum_income_3 <- ddply(df_out_mm_income_3, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_income_3)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_income_3 <- rbind(mm_sum_income_3, ridge_attributes)

###-------------------------------------------
## combine the coefficients and mms from all groups
mm_sum_income_1$income <- "Low"
mm_sum_income_2$income <- "Medium"
mm_sum_income_3$income <- "High"

mm_sum_income <- rbind(mm_sum_income_1, mm_sum_income_2, mm_sum_income_3)

# order income groups
mm_sum_income$income <- factor(mm_sum_income$income, levels = c("Low", "Medium", "High"))

## plot the mms
# rename the levels
mm_sum_income$names_final <- ifelse(mm_sum_income$term == "EducationDecrease", "Decrease education spending",
                                    ifelse(mm_sum_income$term == "EducationIncrease", "Increase education spending",
                                           ifelse(mm_sum_income$term == "EducationNochange", "No change",
                                                  ifelse(mm_sum_income$term == "PensionsDecrease", "Decrease pension spending",
                                                         ifelse(mm_sum_income$term == "PensionsIncrease", "Increase pension spending",
                                                                ifelse(mm_sum_income$term == "PensionsNochange", "No change",
                                                                       ifelse(mm_sum_income$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                              ifelse(mm_sum_income$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                     ifelse(mm_sum_income$term == "ALMPNochange", "No change",
                                                                                            ifelse(mm_sum_income$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                   ifelse(mm_sum_income$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                          ifelse(mm_sum_income$term == "PLMPNochange", "No change",
                                                                                                                 ifelse(mm_sum_income$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                        ifelse(mm_sum_income$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                               ifelse(mm_sum_income$term == "ChildcareNochange", "No change",
                                                                                                                                      ifelse(mm_sum_income$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                             ifelse(mm_sum_income$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                    ifelse(mm_sum_income$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))
# exclude the intercept from the df
mm_sum_income <- subset(mm_sum_income, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_income$group <- ifelse(mm_sum_income$term == "(Education)" | mm_sum_income$term == "EducationNochange" | mm_sum_income$term == "EducationIncrease" | mm_sum_income$term == "EducationDecrease", 1,
                              ifelse(mm_sum_income$term == "(Pensions)" | mm_sum_income$term == "PensionsNochange" | mm_sum_income$term == "PensionsIncrease" | mm_sum_income$term == "PensionsDecrease", 2,
                                     ifelse(mm_sum_income$term == "(ALMP)" | mm_sum_income$term ==  "ALMPNochange" | mm_sum_income$term == "ALMPIncrease" | mm_sum_income$term == "ALMPDecrease", 3,
                                            ifelse(mm_sum_income$term == "(PLMP)" | mm_sum_income$term == "PLMPNochange" | mm_sum_income$term == "PLMPIncrease" | mm_sum_income$term == "PLMPDecrease", 4,
                                                   ifelse(mm_sum_income$term == "(Childcare)" | mm_sum_income$term == "ChildcareNochange" | mm_sum_income$term == "ChildcareIncrease" | mm_sum_income$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_income$names_ordered <- factor(mm_sum_income$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                     "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                     "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                     "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                     "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                     "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the mm
mmplot_income <- ggplot(mm_sum_income, aes(x = names_ordered, colour = income)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = income), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() +
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Income level") +
   labs(shape = "Income level") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_income

ggsave(mmplot_income, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a15.eps")

#--------------------------------------------------------
### figure a16: estimated marginal means by left-right placement ###
#--------------------------------------------------------

###-----------------------------------
## left

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$ideology == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$ideology == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_ideology_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$ideology == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_ideology_1 <- do.call(rbind.data.frame, out_mm_ideology_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_ideology_1$term <- as.character(df_out_mm_ideology_1$variable)
df_out_mm_ideology_1$estimate <- as.numeric(df_out_mm_ideology_1$mean)

mm_sum_ideology_1 <- ddply(df_out_mm_ideology_1, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_ideology_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_ideology_1 <- rbind(mm_sum_ideology_1, ridge_attributes)

###-----------------------------------
## center

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$ideology == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$ideology == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_ideology_2 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$ideology == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_ideology_2 <- do.call(rbind.data.frame, out_mm_ideology_2)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_ideology_2$term <- as.character(df_out_mm_ideology_2$variable)
df_out_mm_ideology_2$estimate <- as.numeric(df_out_mm_ideology_2$mean)

mm_sum_ideology_2 <- ddply(df_out_mm_ideology_2, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_ideology_2)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_ideology_2 <- rbind(mm_sum_ideology_2, ridge_attributes)

###-----------------------------------
## right

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$ideology == 3),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$ideology == 3),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_ideology_3 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$ideology == 3),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_ideology_3 <- do.call(rbind.data.frame, out_mm_ideology_3)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_ideology_3$term <- as.character(df_out_mm_ideology_3$variable)
df_out_mm_ideology_3$estimate <- as.numeric(df_out_mm_ideology_3$mean)

mm_sum_ideology_3 <- ddply(df_out_mm_ideology_3, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_ideology_3)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_ideology_3 <- rbind(mm_sum_ideology_3, ridge_attributes)

###-------------------------------------------
## combine the coefficients and mms from all groups
mm_sum_ideology_1$ideology <- "Left"
mm_sum_ideology_2$ideology <- "Center"
mm_sum_ideology_3$ideology <- "Right"

mm_sum_ideology <- rbind(mm_sum_ideology_1, mm_sum_ideology_2, mm_sum_ideology_3)

# order ideology groups
mm_sum_ideology$ideology <- factor(mm_sum_ideology$ideology, levels = c("Left", "Center", "Right"))

## plot the mms
# rename the levels
mm_sum_ideology$names_final <- ifelse(mm_sum_ideology$term == "EducationDecrease", "Decrease education spending",
                                      ifelse(mm_sum_ideology$term == "EducationIncrease", "Increase education spending",
                                             ifelse(mm_sum_ideology$term == "EducationNochange", "No change",
                                                    ifelse(mm_sum_ideology$term == "PensionsDecrease", "Decrease pension spending",
                                                           ifelse(mm_sum_ideology$term == "PensionsIncrease", "Increase pension spending",
                                                                  ifelse(mm_sum_ideology$term == "PensionsNochange", "No change",
                                                                         ifelse(mm_sum_ideology$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                                ifelse(mm_sum_ideology$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                       ifelse(mm_sum_ideology$term == "ALMPNochange", "No change",
                                                                                              ifelse(mm_sum_ideology$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                     ifelse(mm_sum_ideology$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                            ifelse(mm_sum_ideology$term == "PLMPNochange", "No change",
                                                                                                                   ifelse(mm_sum_ideology$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                          ifelse(mm_sum_ideology$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                                 ifelse(mm_sum_ideology$term == "ChildcareNochange", "No change",
                                                                                                                                        ifelse(mm_sum_ideology$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                               ifelse(mm_sum_ideology$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                      ifelse(mm_sum_ideology$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))
# exclude the intercept from the df
mm_sum_ideology <- subset(mm_sum_ideology, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_ideology$group <- ifelse(mm_sum_ideology$term == "(Education)" | mm_sum_ideology$term == "EducationNochange" | mm_sum_ideology$term == "EducationIncrease" | mm_sum_ideology$term == "EducationDecrease", 1,
                                ifelse(mm_sum_ideology$term == "(Pensions)" | mm_sum_ideology$term == "PensionsNochange" | mm_sum_ideology$term == "PensionsIncrease" | mm_sum_ideology$term == "PensionsDecrease", 2,
                                       ifelse(mm_sum_ideology$term == "(ALMP)" | mm_sum_ideology$term ==  "ALMPNochange" | mm_sum_ideology$term == "ALMPIncrease" | mm_sum_ideology$term == "ALMPDecrease", 3,
                                              ifelse(mm_sum_ideology$term == "(PLMP)" | mm_sum_ideology$term == "PLMPNochange" | mm_sum_ideology$term == "PLMPIncrease" | mm_sum_ideology$term == "PLMPDecrease", 4,
                                                     ifelse(mm_sum_ideology$term == "(Childcare)" | mm_sum_ideology$term == "ChildcareNochange" | mm_sum_ideology$term == "ChildcareIncrease" | mm_sum_ideology$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_ideology$names_ordered <- factor(mm_sum_ideology$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                         "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                         "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                         "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                         "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                         "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))


# plot the mm
mmplot_ideology <- ggplot(mm_sum_ideology, aes(x = names_ordered, colour = ideology)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = ideology), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() +
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Left-right placement") +
   labs(shape = "Left-right placement") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_ideology

ggsave(mmplot_ideology, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a16.eps")

#--------------------------------------------------------
### figure a17: estimated acmes by task ###
#--------------------------------------------------------

###-----------------------------------
## task 1

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$task == 1),],
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$task == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_task_1 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$task == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_task_1 <- do.call(rbind.data.frame, out_coef_task_1)

# calculate mean and higher/lower CI of the estimates
coef_sum_task_1 <- ddply(df_out_coef_task_1, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_task_1)

# include the baseline as a level
rigde_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_task_1 <- rbind(coef_sum_task_1, rigde_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_task_1 <- rbind(coef_sum_task_1, ridge_attributes)

###-----------------------------------
## task 2

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$task == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$task == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_task_2 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$task == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_task_2 <- do.call(rbind.data.frame, out_coef_task_2)

# calculate mean and higher/lower CI of the estimates
coef_sum_task_2 <- ddply(df_out_coef_task_2, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_task_2)

# include the baseline as a level (only for the coefficients)
coef_sum_task_2 <- rbind(coef_sum_task_2, rigde_baselines)

# include the attribute name as a level
coef_sum_task_2 <- rbind(coef_sum_task_2, ridge_attributes)

###-----------------------------------
## task 3

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$task == 3),],
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$task == 3),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_task_3 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$task == 3),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_task_3 <- do.call(rbind.data.frame, out_coef_task_3)

# calculate mean and higher/lower CI of the estimates
coef_sum_task_3 <- ddply(df_out_coef_task_3, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_task_3)

# include the baseline as a level
coef_sum_task_3 <- rbind(coef_sum_task_3, rigde_baselines)

# include the attribute name as a level
coef_sum_task_3 <- rbind(coef_sum_task_3, ridge_attributes)

###-----------------------------------
## task 4

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$task == 4),],
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$task == 4),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_task_4 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$task == 4),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_task_4 <- do.call(rbind.data.frame, out_coef_task_4)

# calculate mean and higher/lower CI of the estimates
coef_sum_task_4 <- ddply(df_out_coef_task_4, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_task_4)

# include the baseline as a level
coef_sum_task_4 <- rbind(coef_sum_task_4, rigde_baselines)

# include the attribute name as a level
coef_sum_task_4 <- rbind(coef_sum_task_4, ridge_attributes)

###-----------------------------------
## task 5

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$task == 5),],
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$task == 5),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_task_5 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$task == 5),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_task_5 <- do.call(rbind.data.frame, out_coef_task_5)

# calculate mean and higher/lower CI of the estimates
coef_sum_task_5 <- ddply(df_out_coef_task_5, c("term"), summarise,
                         mean = mean(estimate),
                         low=quantile(estimate, alpha / 2),
                         high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_task_5)

# include the baseline as a level
coef_sum_task_5 <- rbind(coef_sum_task_5, rigde_baselines)

# include the attribute name as a level
coef_sum_task_5 <- rbind(coef_sum_task_5, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all tasks
coef_sum_task_1$task <- "Task 1"
coef_sum_task_2$task <- "Task 2"
coef_sum_task_3$task <- "Task 3"
coef_sum_task_4$task <- "Task 4"
coef_sum_task_5$task <- "Task 5"

coef_sum_task <- rbind(coef_sum_task_1, coef_sum_task_2, coef_sum_task_3, coef_sum_task_4, coef_sum_task_5)

## plot the coefficients
# rename the levels
coef_sum_task$names_final <- ifelse(coef_sum_task$term == "EducationDecrease", "Decrease education spending",
                                    ifelse(coef_sum_task$term == "EducationIncrease", "Increase education spending",
                                           ifelse(coef_sum_task$term == "PensionsDecrease", "Decrease pension spending",
                                                  ifelse(coef_sum_task$term == "PensionsIncrease", "Increase pension spending",
                                                         ifelse(coef_sum_task$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                ifelse(coef_sum_task$term == "ALMPIncrease", "Increase ALMP spending",
                                                                       ifelse(coef_sum_task$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                              ifelse(coef_sum_task$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                     ifelse(coef_sum_task$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                            ifelse(coef_sum_task$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                   ifelse(coef_sum_task$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                          ifelse(coef_sum_task$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))



# exclude the intercept from the df
coef_sum_task <- subset(coef_sum_task, term!= "(Intercept)")

# group the levels by attribute
coef_sum_task$group <- ifelse(coef_sum_task$term == "(Education)" | coef_sum_task$term == "EducationNochange" | coef_sum_task$term == "EducationIncrease" | coef_sum_task$term == "EducationDecrease", 1,
                              ifelse(coef_sum_task$term == "(Pensions)" | coef_sum_task$term == "PensiosNochange" | coef_sum_task$term == "PensionsIncrease" | coef_sum_task$term == "PensionsDecrease", 2,
                                     ifelse(coef_sum_task$term == "(ALMP)" | coef_sum_task$term ==  "ALMPNochange" | coef_sum_task$term == "ALMPIncrease" | coef_sum_task$term == "ALMPDecrease", 3,
                                            ifelse(coef_sum_task$term == "(PLMP)" | coef_sum_task$term == "PLMPNochange" | coef_sum_task$term == "PLMPIncrease" | coef_sum_task$term == "PLMPDecrease", 4,
                                                   ifelse(coef_sum_task$term == "(Childcare)" | coef_sum_task$term == "ChildcareNochange" | coef_sum_task$term == "ChildcareIncrease" | coef_sum_task$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_task$names_ordered <- factor(coef_sum_task$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                     "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                     "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                     "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                     "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                     "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_task <- ggplot(coef_sum_task, aes(x = names_ordered, colour = task)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = task), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Task") +
   labs(shape = "Task") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Change in Pr(Support for reform package)") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
coefplot_task

ggsave(coefplot_task, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a17.eps")

#--------------------------------------------------------
### figure a18: amces by profile ###
#--------------------------------------------------------

###-----------------------------------
## profile 1

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$profile == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$profile == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_profile_1 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$profile == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_profile_1 <- do.call(rbind.data.frame, out_coef_profile_1)
df_out_mm_profile_1 <- do.call(rbind.data.frame, out_mm_profile_1)

# calculate mean and higher/lower CI of the estimatee
coef_sum_profile_1 <- ddply(df_out_coef_profile_1, c("term"), summarise,
                            mean = mean(estimate),
                            low=quantile(estimate, alpha / 2),
                            high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_profile_1)

# include the baseline as a level
rigde_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_profile_1 <- rbind(coef_sum_profile_1, rigde_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_profile_1 <- rbind(coef_sum_profile_1, ridge_attributes)

###-----------------------------------
## profile 2

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$profile == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$profile == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients with ridge regression
start_time <- Sys.time()
out_coef_profile_2 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$profile == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_profile_2 <- do.call(rbind.data.frame, out_coef_profile_2)

# calculate mean and higher/lower CI of the estimates
coef_sum_profile_2 <- ddply(df_out_coef_profile_2, c("term"), summarise,
                            mean = mean(estimate),
                            low=quantile(estimate, alpha / 2),
                            high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_profile_2)

# include the baseline as a level
coef_sum_profile_2 <- rbind(coef_sum_profile_2, rigde_baselines)

# include the attribute name as a level
coef_sum_profile_2 <- rbind(coef_sum_profile_2, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all profiles
coef_sum_profile_1$profile <- "Profile 1"
coef_sum_profile_2$profile <- "Profile 2"

coef_sum_profile <- rbind(coef_sum_profile_1, coef_sum_profile_2)

## plot the coefficients
# rename the levels
coef_sum_profile$names_final <- ifelse(coef_sum_profile$term == "EducationDecrease", "Decrease education spending",
                                       ifelse(coef_sum_profile$term == "EducationIncrease", "Increase education spending",
                                              ifelse(coef_sum_profile$term == "PensionsDecrease", "Decrease pension spending",
                                                     ifelse(coef_sum_profile$term == "PensionsIncrease", "Increase pension spending",
                                                            ifelse(coef_sum_profile$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                   ifelse(coef_sum_profile$term == "ALMPIncrease", "Increase ALMP spending",
                                                                          ifelse(coef_sum_profile$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                 ifelse(coef_sum_profile$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                        ifelse(coef_sum_profile$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                               ifelse(coef_sum_profile$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                      ifelse(coef_sum_profile$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                             ifelse(coef_sum_profile$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))



# exclude the intercept from the df
coef_sum_profile <- subset(coef_sum_profile, term!= "(Intercept)")

# group the levels by attribute
coef_sum_profile$group <- ifelse(coef_sum_profile$term == "(Education)" | coef_sum_profile$term == "EducationNochange" | coef_sum_profile$term == "EducationIncrease" | coef_sum_profile$term == "EducationDecrease", 1,
                                 ifelse(coef_sum_profile$term == "(Pensions)" | coef_sum_profile$term == "PensiosNochange" | coef_sum_profile$term == "PensionsIncrease" | coef_sum_profile$term == "PensionsDecrease", 2,
                                        ifelse(coef_sum_profile$term == "(ALMP)" | coef_sum_profile$term ==  "ALMPNochange" | coef_sum_profile$term == "ALMPIncrease" | coef_sum_profile$term == "ALMPDecrease", 3,
                                               ifelse(coef_sum_profile$term == "(PLMP)" | coef_sum_profile$term == "PLMPNochange" | coef_sum_profile$term == "PLMPIncrease" | coef_sum_profile$term == "PLMPDecrease", 4,
                                                      ifelse(coef_sum_profile$term == "(Childcare)" | coef_sum_profile$term == "ChildcareNochange" | coef_sum_profile$term == "ChildcareIncrease" | coef_sum_profile$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_profile$names_ordered <- factor(coef_sum_profile$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                           "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                           "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                           "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                           "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                           "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_profile <- ggplot(coef_sum_profile, aes(x = names_ordered, colour = profile)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = profile), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Profile") +
   labs(shape = "Profile") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Change in Pr(Support for reform package)") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
coefplot_profile

ggsave(coefplot_profile, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a18.eps")

#--------------------------------------------------------
### figure a19: amces for first two tasks only ###
#--------------------------------------------------------

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj [which (df_cj$task <= 2),],
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$task <= 2),]$selected

# set the number of cores
registerDoMC(cores = 10)

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate coefficients
start_time <- Sys.time()
out_coef_2 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$task <= 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_2 <- do.call(rbind.data.frame, out_coef_2)

# calculate mean and higher/lower CI of the estimates
coef_sum_2 <- ddply(df_out_coef_2, c("term"), summarise,
                    mean = mean(estimate),
                    low=quantile(estimate, alpha / 2),
                    high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_2)

# include the baseline as a level
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_2 <- rbind(coef_sum_2, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_2 <- rbind(coef_sum_2, ridge_attributes)

## plot the coefficients
# rename the levels
coef_sum_2$names_final <- ifelse(coef_sum_2$term == "EducationDecrease", "Decrease education spending",
                                 ifelse(coef_sum_2$term == "EducationIncrease", "Increase education spending",
                                        ifelse(coef_sum_2$term == "PensionsDecrease", "Decrease pension spending",
                                               ifelse(coef_sum_2$term == "PensionsIncrease", "Increase pension spending",
                                                      ifelse(coef_sum_2$term == "ALMPDecrease", "Decrease ALMP spending",
                                                             ifelse(coef_sum_2$term == "ALMPIncrease", "Increase ALMP spending",
                                                                    ifelse(coef_sum_2$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                           ifelse(coef_sum_2$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                  ifelse(coef_sum_2$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                         ifelse(coef_sum_2$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                ifelse(coef_sum_2$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                       ifelse(coef_sum_2$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))

# exclude the intercept from the df
coef_sum_2 <- subset(coef_sum_2, term!= "(Intercept)")

# group the levels by attribute
coef_sum_2$group <- ifelse(coef_sum_2$term == "(Education)" | coef_sum_2$term == "EducationNochange" | coef_sum_2$term == "EducationIncrease" | coef_sum_2$term == "EducationDecrease", 1,
                           ifelse(coef_sum_2$term == "(Pensions)" | coef_sum_2$term == "PensionsNochange" | coef_sum_2$term == "PensionsIncrease" | coef_sum_2$term == "PensionsDecrease", 2,
                                  ifelse(coef_sum_2$term == "(ALMP)" | coef_sum_2$term ==  "ALMPNochange" | coef_sum_2$term == "ALMPIncrease" | coef_sum_2$term == "ALMPDecrease", 3,
                                         ifelse(coef_sum_2$term == "(PLMP)" | coef_sum_2$term == "PLMPNochange" | coef_sum_2$term == "PLMPIncrease" | coef_sum_2$term == "PLMPDecrease", 4,
                                                ifelse(coef_sum_2$term == "(Childcare)" | coef_sum_2$term == "ChildcareNochange" | coef_sum_2$term == "ChildcareIncrease" | coef_sum_2$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_2$names_ordered <- factor(coef_sum_2$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                               "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)",
                                                               "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                               "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)",
                                                               "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                               "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_2 <- ggplot(coef_sum_2,aes(x = names_ordered)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1) +
   geom_point(aes(y=mean), size=1.5) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() +
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   theme(legend.position="none") +
   xlab("") + ylab("Change in Pr(Support for reform package)")
coefplot_2

ggsave(coefplot_2, width = 15, height = 12, units = c("cm"), file ="figure_a19.eps")

#--------------------------------------------------------
### figure a20: amces by speed ###
#--------------------------------------------------------

###-----------------------------------
## speed: first quartile

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$speed == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$speed == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_speed_1 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$speed == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_speed_1 <- do.call(rbind.data.frame, out_coef_speed_1)

# calculate mean and higher/lower CI of the estimates
coef_sum_speed_1 <- ddply(df_out_coef_speed_1, c("term"), summarise,
                          mean = mean(estimate),
                          low=quantile(estimate, alpha / 2),
                          high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_speed_1)

# include the baseline as a level
rigde_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_speed_1 <- rbind(coef_sum_speed_1, rigde_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_speed_1 <- rbind(coef_sum_speed_1, ridge_attributes)

###-----------------------------------
## speed 2

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$speed == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$speed == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate results with ridge regression
start_time <- Sys.time()
out_coef_speed_2 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$speed == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_speed_2 <- do.call(rbind.data.frame, out_coef_speed_2)

# calculate mean and higher/lower CI of the estimates
coef_sum_speed_2 <- ddply(df_out_coef_speed_2, c("term"), summarise,
                          mean = mean(estimate),
                          low=quantile(estimate, alpha / 2),
                          high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_speed_2)

# include the baseline as a level
coef_sum_speed_2 <- rbind(coef_sum_speed_2, rigde_baselines)

# include the attribute name as a level
coef_sum_speed_2 <- rbind(coef_sum_speed_2, ridge_attributes)

###-----------------------------------
## speed 3

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$speed == 3),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$speed == 3),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate results with ridge regression
start_time <- Sys.time()
out_coef_speed_3 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$speed == 3),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_speed_3 <- do.call(rbind.data.frame, out_coef_speed_3)

# calculate mean and higher/lower CI of the estimates
coef_sum_speed_3 <- ddply(df_out_coef_speed_3, c("term"), summarise,
                          mean = mean(estimate),
                          low=quantile(estimate, alpha / 2),
                          high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_speed_3)

# include the baseline as a level
coef_sum_speed_3 <- rbind(coef_sum_speed_3, rigde_baselines)

# include the attribute name as a level
coef_sum_speed_3 <- rbind(coef_sum_speed_3, ridge_attributes)

###-----------------------------------
## speed 4

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$speed == 4),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$speed == 4),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_speed_4 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$speed == 4),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_speed_4 <- do.call(rbind.data.frame, out_coef_speed_4)

# calculate mean and higher/lower CI of the estimates
coef_sum_speed_4 <- ddply(df_out_coef_speed_4, c("term"), summarise,
                          mean = mean(estimate),
                          low=quantile(estimate, alpha / 2),
                          high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_speed_4)

# include the baseline as a level
coef_sum_speed_4 <- rbind(coef_sum_speed_4, rigde_baselines)

# include the attribute name as a level
coef_sum_speed_4 <- rbind(coef_sum_speed_4, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all speeds
coef_sum_speed_1$speed <- "1"
coef_sum_speed_2$speed <- "2"
coef_sum_speed_3$speed <- "3"
coef_sum_speed_4$speed <- "4"

coef_sum_speed <- rbind(coef_sum_speed_1, coef_sum_speed_2, coef_sum_speed_3, coef_sum_speed_4)

# rename the levels
coef_sum_speed$names_final <- ifelse(coef_sum_speed$term == "EducationDecrease", "Decrease education spending",
                                     ifelse(coef_sum_speed$term == "EducationIncrease", "Increase education spending",
                                            ifelse(coef_sum_speed$term == "PensionsDecrease", "Decrease pension spending",
                                                   ifelse(coef_sum_speed$term == "PensionsIncrease", "Increase pension spending",
                                                          ifelse(coef_sum_speed$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                 ifelse(coef_sum_speed$term == "ALMPIncrease", "Increase ALMP spending",
                                                                        ifelse(coef_sum_speed$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                               ifelse(coef_sum_speed$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                      ifelse(coef_sum_speed$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                             ifelse(coef_sum_speed$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                    ifelse(coef_sum_speed$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                           ifelse(coef_sum_speed$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))



# exclude the intercept from the df
coef_sum_speed <- subset(coef_sum_speed, term!= "(Intercept)")

# group the levels by attribute
coef_sum_speed$group <- ifelse(coef_sum_speed$term == "(Education)" | coef_sum_speed$term == "EducationNochange" | coef_sum_speed$term == "EducationIncrease" | coef_sum_speed$term == "EducationDecrease", 1,
                               ifelse(coef_sum_speed$term == "(Pensions)" | coef_sum_speed$term == "PensiosNochange" | coef_sum_speed$term == "PensionsIncrease" | coef_sum_speed$term == "PensionsDecrease", 2,
                                      ifelse(coef_sum_speed$term == "(ALMP)" | coef_sum_speed$term ==  "ALMPNochange" | coef_sum_speed$term == "ALMPIncrease" | coef_sum_speed$term == "ALMPDecrease", 3,
                                             ifelse(coef_sum_speed$term == "(PLMP)" | coef_sum_speed$term == "PLMPNochange" | coef_sum_speed$term == "PLMPIncrease" | coef_sum_speed$term == "PLMPDecrease", 4,
                                                    ifelse(coef_sum_speed$term == "(Childcare)" | coef_sum_speed$term == "ChildcareNochange" | coef_sum_speed$term == "ChildcareIncrease" | coef_sum_speed$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_speed$names_ordered <- factor(coef_sum_speed$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                       "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                       "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                       "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                       "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                       "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_speed <- ggplot(coef_sum_speed, aes(x = names_ordered, colour = speed)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = speed), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Quantile") +
   labs(shape = "Quantile") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Change in Pr(Support for reform package)") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
coefplot_speed

ggsave(coefplot_speed, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a20.eps")

#--------------------------------------------------------
### figure a21: estimated amces by conjoint number ###
#--------------------------------------------------------

df_cj$conjoint <- as.numeric(as.character(df_cj$conjoint))

###-----------------------------------
## conjoint 1

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$conjoint == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$conjoint == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_conjoint_1 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$conjoint == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_conjoint_1 <- do.call(rbind.data.frame, out_coef_conjoint_1)

# calculate mean and higher/lower CI of the estimates
coef_sum_conjoint_1 <- ddply(df_out_coef_conjoint_1, c("term"), summarise,
                             mean = mean(estimate),
                             low=quantile(estimate, alpha / 2),
                             high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_conjoint_1)

# include the baseline as a level
rigde_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_conjoint_1 <- rbind(coef_sum_conjoint_1, rigde_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_conjoint_1 <- rbind(mm_sum_conjoint_1, ridge_attributes)

###-----------------------------------
## conjoint 2

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$conjoint == 2),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$conjoint == 2),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_conjoint_2 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$conjoint == 2),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_conjoint_2 <- do.call(rbind.data.frame, out_coef_conjoint_2)

# calculate mean and higher/lower CI of the estimates
coef_sum_conjoint_2 <- ddply(df_out_coef_conjoint_2, c("term"), summarise,
                             mean = mean(estimate),
                             low=quantile(estimate, alpha / 2),
                             high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_conjoint_2)

# include the baseline as a level
coef_sum_conjoint_2 <- rbind(coef_sum_conjoint_2, rigde_baselines)

# include the attribute name as a level
coef_sum_conjoint_2 <- rbind(coef_sum_conjoint_2, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all countries
coef_sum_conjoint_1$conjoint <- "Conjoint 1"
coef_sum_conjoint_2$conjoint <- "Conjoint 2"

coef_sum_conjoint <- rbind(coef_sum_conjoint_1, coef_sum_conjoint_2)

# rename the levels
coef_sum_conjoint$names_final <- ifelse(coef_sum_conjoint$term == "EducationDecrease", "Decrease education spending",
                                        ifelse(coef_sum_conjoint$term == "EducationIncrease", "Increase education spending",
                                               ifelse(coef_sum_conjoint$term == "PensionsDecrease", "Decrease pension spending",
                                                      ifelse(coef_sum_conjoint$term == "PensionsIncrease", "Increase pension spending",
                                                             ifelse(coef_sum_conjoint$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                    ifelse(coef_sum_conjoint$term == "ALMPIncrease", "Increase ALMP spending",
                                                                           ifelse(coef_sum_conjoint$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                  ifelse(coef_sum_conjoint$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                         ifelse(coef_sum_conjoint$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                ifelse(coef_sum_conjoint$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                       ifelse(coef_sum_conjoint$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                              ifelse(coef_sum_conjoint$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))



# exclude the intercept from the df
coef_sum_conjoint <- subset(coef_sum_conjoint, term!= "(Intercept)")

# group the levels by attribute
coef_sum_conjoint$group <- ifelse(coef_sum_conjoint$term == "(Education)" | coef_sum_conjoint$term == "EducationNochange" | coef_sum_conjoint$term == "EducationIncrease" | coef_sum_conjoint$term == "EducationDecrease", 1,
                                  ifelse(coef_sum_conjoint$term == "(Pensions)" | coef_sum_conjoint$term == "PensiosNochange" | coef_sum_conjoint$term == "PensionsIncrease" | coef_sum_conjoint$term == "PensionsDecrease", 2,
                                         ifelse(coef_sum_conjoint$term == "(ALMP)" | coef_sum_conjoint$term ==  "ALMPNochange" | coef_sum_conjoint$term == "ALMPIncrease" | coef_sum_conjoint$term == "ALMPDecrease", 3,
                                                ifelse(coef_sum_conjoint$term == "(PLMP)" | coef_sum_conjoint$term == "PLMPNochange" | coef_sum_conjoint$term == "PLMPIncrease" | coef_sum_conjoint$term == "PLMPDecrease", 4,
                                                       ifelse(coef_sum_conjoint$term == "(Childcare)" | coef_sum_conjoint$term == "ChildcareNochange" | coef_sum_conjoint$term == "ChildcareIncrease" | coef_sum_conjoint$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_conjoint$names_ordered <- factor(coef_sum_conjoint$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                             "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                             "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                             "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                             "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                             "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_conjoint <- ggplot(coef_sum_conjoint, aes(x = names_ordered, colour = conjoint)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = conjoint), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Conjoint number") +
   labs(shape = "Conjoint number") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Change in Pr(Support for reform package)") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
coefplot_conjoint

ggsave(coefplot_conjoint, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a21.eps")

#--------------------------------------------------------
### figure a22: estimated amces screen size ###
#--------------------------------------------------------

###-----------------------------------
## screen: phone

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$screen == "phone"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$screen == "phone"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_screen_1 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$screen == "phone"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_screen_1 <- do.call(rbind.data.frame, out_coef_screen_1)

# calculate mean and higher/lower CI of the estimates
coef_sum_screen_1 <- ddply(df_out_coef_screen_1, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_screen_1)

# include the baseline as a level
rigde_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0)) 

coef_sum_screen_1 <- rbind(coef_sum_screen_1, rigde_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

coef_sum_screen_1 <- rbind(coef_sum_screen_1, ridge_attributes)

###-----------------------------------
## screen: tablet

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$screen == "tablet"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$screen == "tablet"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_screen_2 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$screen == "tablet"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_screen_2 <- do.call(rbind.data.frame, out_coef_screen_2)

# calculate mean and higher/lower CI of the estimates
coef_sum_screen_2 <- ddply(df_out_coef_screen_2, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_screen_2)

# include the baseline as a level
coef_sum_screen_2 <- rbind(coef_sum_screen_2, rigde_baselines)

# include the attribute name as a level
coef_sum_screen_2 <- rbind(coef_sum_screen_2, ridge_attributes)

###-----------------------------------
## screen 3

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$screen == "computer"),],
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$screen == "computer"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate the coefficients with ridge regression
start_time <- Sys.time()
out_coef_screen_3 <- mclapply(1:B, boot.ridge, data = df_cj[which (df_cj$screen == "computer"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef_screen_3 <- do.call(rbind.data.frame, out_coef_screen_3)

# calculate mean and higher/lower CI of the estimates
alpha = .05

coef_sum_screen_3 <- ddply(df_out_coef_screen_3, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(coef_sum_screen_3)

# include the baseline as a level
coef_sum_screen_3 <- rbind(coef_sum_screen_3, rigde_baselines)

# include the attribute name as a level
coef_sum_screen_3 <- rbind(coef_sum_screen_3, ridge_attributes)

###-------------------------------------------
## combine the coefficients from all screens
coef_sum_screen_1$screen <- "Phone"
coef_sum_screen_2$screen <- "Tablet"
coef_sum_screen_3$screen <- "Computer"

coef_sum_screen <- rbind(coef_sum_screen_1, coef_sum_screen_2, coef_sum_screen_3)

## plot the coefficients
# rename the levels
coef_sum_screen$names_final <- ifelse(coef_sum_screen$term == "EducationDecrease", "Decrease education spending",
                                      ifelse(coef_sum_screen$term == "EducationIncrease", "Increase education spending",
                                             ifelse(coef_sum_screen$term == "PensionsDecrease", "Decrease pension spending",
                                                    ifelse(coef_sum_screen$term == "PensionsIncrease", "Increase pension spending",
                                                           ifelse(coef_sum_screen$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                  ifelse(coef_sum_screen$term == "ALMPIncrease", "Increase ALMP spending",
                                                                         ifelse(coef_sum_screen$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                ifelse(coef_sum_screen$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                       ifelse(coef_sum_screen$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                              ifelse(coef_sum_screen$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                     ifelse(coef_sum_screen$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                            ifelse(coef_sum_screen$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))



# exclude the intercept from the df
coef_sum_screen <- subset(coef_sum_screen, term!= "(Intercept)")

# group the levels by attribute
coef_sum_screen$group <- ifelse(coef_sum_screen$term == "(Education)" | coef_sum_screen$term == "EducationNochange" | coef_sum_screen$term == "EducationIncrease" | coef_sum_screen$term == "EducationDecrease", 1,
                                ifelse(coef_sum_screen$term == "(Pensions)" | coef_sum_screen$term == "PensiosNochange" | coef_sum_screen$term == "PensionsIncrease" | coef_sum_screen$term == "PensionsDecrease", 2,
                                       ifelse(coef_sum_screen$term == "(ALMP)" | coef_sum_screen$term ==  "ALMPNochange" | coef_sum_screen$term == "ALMPIncrease" | coef_sum_screen$term == "ALMPDecrease", 3,
                                              ifelse(coef_sum_screen$term == "(PLMP)" | coef_sum_screen$term == "PLMPNochange" | coef_sum_screen$term == "PLMPIncrease" | coef_sum_screen$term == "PLMPDecrease", 4,
                                                     ifelse(coef_sum_screen$term == "(Childcare)" | coef_sum_screen$term == "ChildcareNochange" | coef_sum_screen$term == "ChildcareIncrease" | coef_sum_screen$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum_screen$names_ordered <- factor(coef_sum_screen$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                         "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                         "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                         "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                         "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                         "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

# plot the coefficients
coefplot_screen <- ggplot(coef_sum_screen, aes(x = names_ordered, colour = screen)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = screen), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","(Child benefits)",
                               "Decrease","Increase","No change","(Childcare)",
                               "Decrease","Increase","No change","(PLMP)",
                               "Decrease","Increase","No change","(ALMP)",
                               "Decrease","Increase","No change","(Pension spending)",
                               "Decrease","Increase","No change", "(Education spending)")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   labs(colour = "Screen size") +
   labs(shape = "Screen size") +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Change in Pr(Support for reform package)") +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
coefplot_screen

ggsave(coefplot_screen, width = 17.5, height = 22.5, units = c("cm"), file ="figure_a22.eps")